From 0524aef5b61e1d27f5c7e7d8f22ad5ab560a89b6 Mon Sep 17 00:00:00 2001 From: Chad Horohoe Date: Mon, 25 Aug 2008 20:19:00 +0000 Subject: [PATCH] * Add all HTMLDiff-related classes to the autoloader. * Move all stuff @ingroup DifferenceEngine to /diff * Split off Node and friends to Nodes.php to make HTMLDiff.php shorter. --- includes/AutoLoader.php | 64 ++-- includes/{ => diff}/Diff.php | 0 includes/{ => diff}/DifferenceEngine.php | 0 includes/{ => diff}/HTMLDiff.php | 411 +--------------------- includes/diff/Nodes.php | 430 +++++++++++++++++++++++ 5 files changed, 477 insertions(+), 428 deletions(-) rename includes/{ => diff}/Diff.php (100%) rename includes/{ => diff}/DifferenceEngine.php (100%) rename includes/{ => diff}/HTMLDiff.php (74%) create mode 100644 includes/diff/Nodes.php diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index d781f3f468..8d01ae2aa1 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -14,7 +14,6 @@ $wgAutoloadLocalClasses = array( 'AjaxResponse' => 'includes/AjaxResponse.php', 'AlphabeticPager' => 'includes/Pager.php', 'APCBagOStuff' => 'includes/BagOStuff.php', - 'ArrayDiffFormatter' => 'includes/DifferenceEngine.php', 'Article' => 'includes/Article.php', 'AtomFeed' => 'includes/Feed.php', 'AuthPlugin' => 'includes/AuthPlugin.php', @@ -34,17 +33,7 @@ $wgAutoloadLocalClasses = array( 'CreativeCommonsRdf' => 'includes/Metadata.php', 'Credits' => 'includes/Credits.php', 'DBABagOStuff' => 'includes/BagOStuff.php', - 'DelegatingContentHandler' => 'includes/HTMLDiff.php', 'DependencyWrapper' => 'includes/CacheDependency.php', - '_DiffEngine' => 'includes/DifferenceEngine.php', - 'DifferenceEngine' => 'includes/DifferenceEngine.php', - 'DiffFormatter' => 'includes/DifferenceEngine.php', - 'Diff' => 'includes/DifferenceEngine.php', - '_DiffOp_Add' => 'includes/DifferenceEngine.php', - '_DiffOp_Change' => 'includes/DifferenceEngine.php', - '_DiffOp_Copy' => 'includes/DifferenceEngine.php', - '_DiffOp_Delete' => 'includes/DifferenceEngine.php', - '_DiffOp' => 'includes/DifferenceEngine.php', 'DjVuImage' => 'includes/DjVuImage.php', 'DoubleReplacer' => 'includes/StringUtils.php', 'DoubleRedirectJob' => 'includes/DoubleRedirectJob.php', @@ -61,7 +50,6 @@ $wgAutoloadLocalClasses = array( 'DumpOutput' => 'includes/Export.php', 'DumpPipeOutput' => 'includes/Export.php', 'eAccelBagOStuff' => 'includes/BagOStuff.php', - 'EchoingContentHandler' => 'includes/HTMLDiff.php', 'EditPage' => 'includes/EditPage.php', 'EmaillingJob' => 'includes/EmaillingJob.php', 'EmailNotification' => 'includes/UserMailer.php', @@ -95,11 +83,8 @@ $wgAutoloadLocalClasses = array( 'HistoryBlobStub' => 'includes/HistoryBlob.php', 'HTMLCacheUpdate' => 'includes/HTMLCacheUpdate.php', 'HTMLCacheUpdateJob' => 'includes/HTMLCacheUpdate.php', - 'HTMLDiffer' => 'includes/HTMLDiff.php', 'HTMLFileCache' => 'includes/HTMLFileCache.php', - 'HTMLOutput' => 'includes/HTMLDiff.php', 'Http' => 'includes/HttpFunctions.php', - '_HWLDF_WordAccumulator' => 'includes/DifferenceEngine.php', 'ImageGallery' => 'includes/ImageGallery.php', 'ImageHistoryList' => 'includes/ImagePage.php', 'ImagePage' => 'includes/ImagePage.php', @@ -124,7 +109,6 @@ $wgAutoloadLocalClasses = array( 'MagicWordArray' => 'includes/MagicWord.php', 'MagicWord' => 'includes/MagicWord.php', 'MailAddress' => 'includes/UserMailer.php', - 'MappedDiff' => 'includes/DifferenceEngine.php', 'MathRenderer' => 'includes/Math.php', 'MediaTransformError' => 'includes/MediaTransformOutput.php', 'MediaTransformOutput' => 'includes/MediaTransformOutput.php', @@ -157,7 +141,6 @@ $wgAutoloadLocalClasses = array( 'ProtectionForm' => 'includes/ProtectionForm.php', 'QueryPage' => 'includes/QueryPage.php', 'QuickTemplate' => 'includes/SkinTemplate.php', - 'RangeDifference' => 'includes/Diff.php', 'RawPage' => 'includes/RawPage.php', 'RCCacheEntry' => 'includes/ChangesList.php', 'RdfMetaData' => 'includes/Metadata.php', @@ -196,7 +179,6 @@ $wgAutoloadLocalClasses = array( 'SquidUpdate' => 'includes/SquidUpdate.php', 'Status' => 'includes/Status.php', 'StringUtils' => 'includes/StringUtils.php', - 'TableDiffFormatter' => 'includes/DifferenceEngine.php', 'TablePager' => 'includes/Pager.php', 'ThumbnailImage' => 'includes/MediaTransformOutput.php', 'TitleDependency' => 'includes/CacheDependency.php', @@ -205,7 +187,6 @@ $wgAutoloadLocalClasses = array( 'TitleListDependency' => 'includes/CacheDependency.php', 'TransformParameterError' => 'includes/MediaTransformOutput.php', 'TurckBagOStuff' => 'includes/BagOStuff.php', - 'UnifiedDiffFormatter' => 'includes/DifferenceEngine.php', 'UnlistedSpecialPage' => 'includes/SpecialPage.php', 'User' => 'includes/User.php', 'UserArray' => 'includes/UserArray.php', @@ -216,12 +197,10 @@ $wgAutoloadLocalClasses = array( 'WatchlistEditor' => 'includes/WatchlistEditor.php', 'WebRequest' => 'includes/WebRequest.php', 'WebResponse' => 'includes/WebResponse.php', - 'WikiDiff3' => 'includes/Diff.php', 'WikiError' => 'includes/WikiError.php', 'WikiErrorMsg' => 'includes/WikiError.php', 'WikiExporter' => 'includes/Export.php', 'WikiXmlError' => 'includes/WikiError.php', - 'WordLevelDiff' => 'includes/DifferenceEngine.php', 'XCacheBagOStuff' => 'includes/BagOStuff.php', 'XmlDumpWriter' => 'includes/Export.php', 'Xml' => 'includes/Xml.php', @@ -328,6 +307,49 @@ $wgAutoloadLocalClasses = array( 'ResultWrapper' => 'includes/db/Database.php', 'SQLiteField' => 'includes/db/DatabaseSqlite.php', + # includes/diff + 'AncestorComparator' => 'includes/diff/HTMLDiff.php', + 'AncestorComparatorResult' => 'includes/diff/HTMLDiff.php', + 'AnchorToString' => 'includes/diff/HTMLDiff.php', + 'ArrayDiffFormatter' => 'includes/diff/DifferenceEngine.php', + 'BodyNode' => 'includes/diff/Nodes.php', + 'ChangeText' => 'includes/diff/HTMLDiff.php', + 'ChangeTextGenerator' => 'includes/diff/HTMLDiff.php', + 'DelegatingContentHandler' => 'includes/diff/HTMLDiff.php', + '_DiffEngine' => 'includes/diff/DifferenceEngine.php', + 'DifferenceEngine' => 'includes/diff/DifferenceEngine.php', + 'DiffFormatter' => 'includes/diff/DifferenceEngine.php', + 'Diff' => 'includes/diff/DifferenceEngine.php', + '_DiffOp_Add' => 'includes/diff/DifferenceEngine.php', + '_DiffOp_Change' => 'includes/diff/DifferenceEngine.php', + '_DiffOp_Copy' => 'includes/diff/DifferenceEngine.php', + '_DiffOp_Delete' => 'includes/diff/DifferenceEngine.php', + '_DiffOp' => 'includes/diff/DifferenceEngine.php', + 'DomTreeBuilder' => 'includes/diff/HTMLDiff.php', + 'DummyNode' => 'includes/diff/Nodes.php', + 'EchoingContentHandler' => 'includes/diff/HTMLDiff.php', + 'HTMLDiffer' => 'includes/diff/HTMLDiff.php', + 'HTMLOutput' => 'includes/diff/HTMLDiff.php', + '_HWLDF_WordAccumulator' => 'includes/diff/DifferenceEngine.php', + 'ImageNode' => 'includes/diff/Nodes.php', + 'LastCommonParentResult' => 'includes/diff/HTMLDiff.php', + 'MappedDiff' => 'includes/diff/DifferenceEngine.php', + 'Modification' => 'includes/diff/HTMLDiff.php', + 'NoContentTagToString' => 'includes/diff/HTMLDiff.php', + 'Node' => 'includes/diff/Nodes.php', + 'RangeDifference' => 'includes/diff/Diff.php', + 'TableDiffFormatter' => 'includes/diff/DifferenceEngine.php', + 'TagNode' => 'includes/diff/Nodes.php', + 'TagToString' => 'includes/diff/HTMLDiff.php', + 'TagToStringFactory' => 'includes/diff/HTMLDiff.php', + 'TextNode' => 'includes/diff/Nodes.php', + 'TextNodeDiffer' => 'includes/diff/HTMLDiff.php', + 'TextOnlyComparator' => 'includes/diff/HTMLDiff.php', + 'UnifiedDiffFormatter' => 'includes/diff/DifferenceEngine.php', + 'WhiteSpaceNode' => 'includes/diff/Nodes.php', + 'WikiDiff3' => 'includes/diff/Diff.php', + 'WordLevelDiff' => 'includes/diff/DifferenceEngine.php', + # includes/filerepo 'ArchivedFile' => 'includes/filerepo/ArchivedFile.php', 'File' => 'includes/filerepo/File.php', diff --git a/includes/Diff.php b/includes/diff/Diff.php similarity index 100% rename from includes/Diff.php rename to includes/diff/Diff.php diff --git a/includes/DifferenceEngine.php b/includes/diff/DifferenceEngine.php similarity index 100% rename from includes/DifferenceEngine.php rename to includes/diff/DifferenceEngine.php diff --git a/includes/HTMLDiff.php b/includes/diff/HTMLDiff.php similarity index 74% rename from includes/HTMLDiff.php rename to includes/diff/HTMLDiff.php index a5e94aacc5..8a162c1d36 100644 --- a/includes/HTMLDiff.php +++ b/includes/diff/HTMLDiff.php @@ -1,5 +1,6 @@ + +/** Copyright (C) 2008 Guy Van den Broeck * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -15,414 +16,10 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * or see http://www.gnu.org/ + * + * @ingroup DifferenceEngine */ -/** - * Any element in the DOM tree of an HTML document. - */ -class Node { - - public $parent; - - protected $parentTree; - - public $whiteBefore = false; - - public $whiteAfter = false; - - function __construct($parent) { - $this->parent = $parent; - } - - public function getParentTree() { - if (!isset($this->parentTree)) { - if (!is_null($this->parent)) { - $this->parentTree = $this->parent->getParentTree(); - $this->parentTree[] = $this->parent; - } else { - $this->parentTree = array(); - } - } - return $this->parentTree; - } - - public function getLastCommonParent(Node $other) { - $result = new LastCommonParentResult(); - - $myParents = $this->getParentTree(); - $otherParents = $other->getParentTree(); - - $i = 1; - $isSame = true; - $nbMyParents = count($myParents); - $nbOtherParents = count($otherParents); - while ($isSame && $i < $nbMyParents && $i < $nbOtherParents) { - if (!$myParents[$i]->openingTag === $otherParents[$i]->openingTag) { - $isSame = false; - } else { - // After a while, the index i-1 must be the last common parent - $i++; - } - } - - $result->lastCommonParentDepth = $i - 1; - $result->parent = $myParents[$i - 1]; - - if (!$isSame || $nbMyParents > $nbOtherParents) { - // Not all tags matched, or all tags matched but - // there are tags left in this tree - $result->indexInLastCommonParent = $myParents[$i - 1]->getIndexOf($myParents[$i]); - $result->splittingNeeded = true; - } else if ($nbMyParents <= $nbOtherParents) { - $result->indexInLastCommonParent = $myParents[$i - 1]->getIndexOf($this); - } - return $result; - } - - public function setParent($parent) { - $this->parent = $parent; - unset($this->parentTree); - } - - public function inPre() { - $tree = $this->getParentTree(); - foreach ($tree as &$ancestor) { - if ($ancestor->isPre()) { - return true; - } - } - return false; - } -} - -/** - * Node that can contain other nodes. Represents an HTML tag. - */ -class TagNode extends Node { - - public $children = array(); - - public $qName; - - public $attributes = array(); - - public $openingTag; - - function __construct($parent, $qName, /*array*/ $attributes) { - parent::__construct($parent); - $this->qName = strtolower($qName); - foreach($attributes as $key => &$value){ - $this->attributes[strtolower($key)] = $value; - } - return $this->openingTag = Xml::openElement($this->qName, $this->attributes); - } - - public function addChildAbsolute(Node $node, $index) { - array_splice($this->children, $index, 0, array($node)); - } - - public function getIndexOf(Node $child) { - // don't trust array_search with objects - foreach ($this->children as $key => &$value){ - if ($value === $child) { - return $key; - } - } - return null; - } - - public function getNbChildren() { - return count($this->children); - } - - public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) { - $nodes = array(); - - $allDeleted = false; - $somethingDeleted = false; - $hasNonDeletedDescendant = false; - - if (empty($this->children)) { - return $nodes; - } - - foreach ($this->children as &$child) { - $allDeleted_local = false; - $somethingDeleted_local = false; - $childrenChildren = $child->getMinimalDeletedSet($id, $allDeleted_local, $somethingDeleted_local); - if ($somethingDeleted_local) { - $nodes = array_merge($nodes, $childrenChildren); - $somethingDeleted = true; - } - if (!$allDeleted_local) { - $hasNonDeletedDescendant = true; - } - } - if (!$hasNonDeletedDescendant) { - $nodes = array($this); - $allDeleted = true; - } - return $nodes; - } - - public function splitUntil(TagNode $parent, Node $split, $includeLeft) { - $splitOccured = false; - if ($parent !== $this) { - $part1 = new TagNode(null, $this->qName, $this->attributes); - $part2 = new TagNode(null, $this->qName, $this->attributes); - $part1->setParent($this->parent); - $part2->setParent($this->parent); - - $onSplit = false; - $pastSplit = false; - foreach ($this->children as &$child) - { - if ($child === $split) { - $onSplit = true; - } - if(!$pastSplit || ($onSplit && $includeLeft)) { - $child->setParent($part1); - $part1->children[] = $child; - } else { - $child->setParent($part2); - $part2->children[] = $child; - } - if ($onSplit) { - $onSplit = false; - $pastSplit = true; - } - } - $myindexinparent = $this->parent->getIndexOf($this); - if (!empty($part1->children)) { - $this->parent->addChildAbsolute($part1, $myindexinparent); - } - if (!empty($part2->children)) { - $this->parent->addChildAbsolute($part2, $myindexinparent); - } - if (!empty($part1->children) && !empty($part2->children)) { - $splitOccured = true; - } - - $this->parent->removeChild($myindexinparent); - - if ($includeLeft) { - $this->parent->splitUntil($parent, $part1, $includeLeft); - } else { - $this->parent->splitUntil($parent, $part2, $includeLeft); - } - } - return $splitOccured; - - } - - private function removeChild($index) { - unset($this->children[$index]); - $this->children = array_values($this->children); - } - - public static $blocks = array('html', 'body','p','blockquote', 'h1', - 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li', 'table', - 'tbody', 'tr', 'td', 'th', 'br'); - - public function copyTree() { - $newThis = new TagNode(null, $this->qName, $this->attributes); - $newThis->whiteBefore = $this->whiteBefore; - $newThis->whiteAfter = $this->whiteAfter; - foreach ($this->children as &$child) { - $newChild = $child->copyTree(); - $newChild->setParent($newThis); - $newThis->children[] = $newChild; - } - return $newThis; - } - - public function getMatchRatio(TagNode $other) { - $txtComp = new TextOnlyComparator($other); - return $txtComp->getMatchRatio(new TextOnlyComparator($this)); - } - - public function expandWhiteSpace() { - $shift = 0; - $spaceAdded = false; - - $nbOriginalChildren = $this->getNbChildren(); - for ($i = 0; $i < $nbOriginalChildren; ++$i) { - $child = $this->children[$i + $shift]; - - if ($child instanceof TagNode) { - if (!$child->isPre()) { - $child->expandWhiteSpace(); - } - } - if (!$spaceAdded && $child->whiteBefore) { - $ws = new WhiteSpaceNode(null, ' ', $child->getLeftMostChild()); - $ws->setParent($this); - $this->addChildAbsolute($ws,$i + ($shift++)); - } - if ($child->whiteAfter) { - $ws = new WhiteSpaceNode(null, ' ', $child->getRightMostChild()); - $ws->setParent($this); - $this->addChildAbsolute($ws,$i + 1 + ($shift++)); - $spaceAdded = true; - } else { - $spaceAdded = false; - } - - } - } - - public function getLeftMostChild() { - if (empty($this->children)) { - return $this; - } - return $this->children[0]->getLeftMostChild(); - } - - public function getRightMostChild() { - if (empty($this->children)) { - return $this; - } - return $this->children[$this->getNbChildren() - 1]->getRightMostChild(); - } - - public function isPre() { - return 0 == strcasecmp($this->qName,'pre'); - } - - public static function toDiffLine(TagNode $node) { - return $node->openingTag; - } -} - -/** - * Represents a piece of text in the HTML file. - */ -class TextNode extends Node { - - public $text; - - public $modification; - - function __construct($parent, $text) { - parent::__construct($parent); - $this->modification = new Modification(Modification::NONE); - $this->text = $text; - } - - public function copyTree() { - $clone = clone $this; - $clone->setParent(null); - return $clone; - } - - public function getLeftMostChild() { - return $this; - } - - public function getRightMostChild() { - return $this; - } - - public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) { - if ($this->modification->type == Modification::REMOVED - && $this->modification->id == $id){ - $somethingDeleted = true; - $allDeleted = true; - return array($this); - } - return array(); - } - - public function isSameText($other) { - if (is_null($other) || ! $other instanceof TextNode) { - return false; - } - return str_replace('\n', ' ',$this->text) === str_replace('\n', ' ',$other->text); - } - - public static function toDiffLine(TextNode $node) { - return str_replace('\n', ' ',$node->text); - } -} - -class WhiteSpaceNode extends TextNode { - - function __construct($parent, $s, Node $like = null) { - parent::__construct($parent, $s); - if(!is_null($like) && $like instanceof TextNode) { - $newModification = clone $like->modification; - $newModification->firstOfID = false; - $this->modification = $newModification; - } - } -} - -/** - * Represents the root of a HTML document. - */ -class BodyNode extends TagNode { - - function __construct() { - parent::__construct(null, 'body', array()); - } - - public function copyTree() { - $newThis = new BodyNode(); - foreach ($this->children as &$child) { - $newChild = $child->copyTree(); - $newChild->setParent($newThis); - $newThis->children[] = $newChild; - } - return $newThis; - } - - public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) { - $nodes = array(); - foreach ($this->children as &$child) { - $childrenChildren = $child->getMinimalDeletedSet($id, - $allDeleted, $somethingDeleted); - $nodes = array_merge($nodes, $childrenChildren); - } - return $nodes; - } - -} - -/** - * Represents an image in HTML. Even though images do not contain any text they - * are independent visible objects on the page. They are logically a TextNode. - */ -class ImageNode extends TextNode { - - public $attributes; - - function __construct(TagNode $parent, /*array*/ $attrs) { - if(!array_key_exists('src', $attrs)) { - HTMLDiffer::diffDebug( "Image without a source\n" ); - parent::__construct($parent, ''); - }else{ - parent::__construct($parent, '' . strtolower($attrs['src']) . ''); - } - $this->attributes = $attrs; - } - - public function isSameText($other) { - if (is_null($other) || ! $other instanceof ImageNode) { - return false; - } - return $this->text === $other->text; - } - -} - -class DummyNode extends Node { - - function __construct() { - // no op - } - -} - /** * When detecting the last common parent of two nodes, all results are stored as * a LastCommonParentResult. diff --git a/includes/diff/Nodes.php b/includes/diff/Nodes.php new file mode 100644 index 0000000000..0f0dbdb376 --- /dev/null +++ b/includes/diff/Nodes.php @@ -0,0 +1,430 @@ + + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * or see http://www.gnu.org/ + * + * @ingroup DifferenceEngine + */ + +/** + * Any element in the DOM tree of an HTML document. + */ +class Node { + + public $parent; + + protected $parentTree; + + public $whiteBefore = false; + + public $whiteAfter = false; + + function __construct($parent) { + $this->parent = $parent; + } + + public function getParentTree() { + if (!isset($this->parentTree)) { + if (!is_null($this->parent)) { + $this->parentTree = $this->parent->getParentTree(); + $this->parentTree[] = $this->parent; + } else { + $this->parentTree = array(); + } + } + return $this->parentTree; + } + + public function getLastCommonParent(Node $other) { + $result = new LastCommonParentResult(); + + $myParents = $this->getParentTree(); + $otherParents = $other->getParentTree(); + + $i = 1; + $isSame = true; + $nbMyParents = count($myParents); + $nbOtherParents = count($otherParents); + while ($isSame && $i < $nbMyParents && $i < $nbOtherParents) { + if (!$myParents[$i]->openingTag === $otherParents[$i]->openingTag) { + $isSame = false; + } else { + // After a while, the index i-1 must be the last common parent + $i++; + } + } + + $result->lastCommonParentDepth = $i - 1; + $result->parent = $myParents[$i - 1]; + + if (!$isSame || $nbMyParents > $nbOtherParents) { + // Not all tags matched, or all tags matched but + // there are tags left in this tree + $result->indexInLastCommonParent = $myParents[$i - 1]->getIndexOf($myParents[$i]); + $result->splittingNeeded = true; + } else if ($nbMyParents <= $nbOtherParents) { + $result->indexInLastCommonParent = $myParents[$i - 1]->getIndexOf($this); + } + return $result; + } + + public function setParent($parent) { + $this->parent = $parent; + unset($this->parentTree); + } + + public function inPre() { + $tree = $this->getParentTree(); + foreach ($tree as &$ancestor) { + if ($ancestor->isPre()) { + return true; + } + } + return false; + } +} + +/** + * Node that can contain other nodes. Represents an HTML tag. + */ +class TagNode extends Node { + + public $children = array(); + + public $qName; + + public $attributes = array(); + + public $openingTag; + + function __construct($parent, $qName, /*array*/ $attributes) { + parent::__construct($parent); + $this->qName = strtolower($qName); + foreach($attributes as $key => &$value){ + $this->attributes[strtolower($key)] = $value; + } + return $this->openingTag = Xml::openElement($this->qName, $this->attributes); + } + + public function addChildAbsolute(Node $node, $index) { + array_splice($this->children, $index, 0, array($node)); + } + + public function getIndexOf(Node $child) { + // don't trust array_search with objects + foreach ($this->children as $key => &$value){ + if ($value === $child) { + return $key; + } + } + return null; + } + + public function getNbChildren() { + return count($this->children); + } + + public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) { + $nodes = array(); + + $allDeleted = false; + $somethingDeleted = false; + $hasNonDeletedDescendant = false; + + if (empty($this->children)) { + return $nodes; + } + + foreach ($this->children as &$child) { + $allDeleted_local = false; + $somethingDeleted_local = false; + $childrenChildren = $child->getMinimalDeletedSet($id, $allDeleted_local, $somethingDeleted_local); + if ($somethingDeleted_local) { + $nodes = array_merge($nodes, $childrenChildren); + $somethingDeleted = true; + } + if (!$allDeleted_local) { + $hasNonDeletedDescendant = true; + } + } + if (!$hasNonDeletedDescendant) { + $nodes = array($this); + $allDeleted = true; + } + return $nodes; + } + + public function splitUntil(TagNode $parent, Node $split, $includeLeft) { + $splitOccured = false; + if ($parent !== $this) { + $part1 = new TagNode(null, $this->qName, $this->attributes); + $part2 = new TagNode(null, $this->qName, $this->attributes); + $part1->setParent($this->parent); + $part2->setParent($this->parent); + + $onSplit = false; + $pastSplit = false; + foreach ($this->children as &$child) + { + if ($child === $split) { + $onSplit = true; + } + if(!$pastSplit || ($onSplit && $includeLeft)) { + $child->setParent($part1); + $part1->children[] = $child; + } else { + $child->setParent($part2); + $part2->children[] = $child; + } + if ($onSplit) { + $onSplit = false; + $pastSplit = true; + } + } + $myindexinparent = $this->parent->getIndexOf($this); + if (!empty($part1->children)) { + $this->parent->addChildAbsolute($part1, $myindexinparent); + } + if (!empty($part2->children)) { + $this->parent->addChildAbsolute($part2, $myindexinparent); + } + if (!empty($part1->children) && !empty($part2->children)) { + $splitOccured = true; + } + + $this->parent->removeChild($myindexinparent); + + if ($includeLeft) { + $this->parent->splitUntil($parent, $part1, $includeLeft); + } else { + $this->parent->splitUntil($parent, $part2, $includeLeft); + } + } + return $splitOccured; + + } + + private function removeChild($index) { + unset($this->children[$index]); + $this->children = array_values($this->children); + } + + public static $blocks = array('html', 'body','p','blockquote', 'h1', + 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li', 'table', + 'tbody', 'tr', 'td', 'th', 'br'); + + public function copyTree() { + $newThis = new TagNode(null, $this->qName, $this->attributes); + $newThis->whiteBefore = $this->whiteBefore; + $newThis->whiteAfter = $this->whiteAfter; + foreach ($this->children as &$child) { + $newChild = $child->copyTree(); + $newChild->setParent($newThis); + $newThis->children[] = $newChild; + } + return $newThis; + } + + public function getMatchRatio(TagNode $other) { + $txtComp = new TextOnlyComparator($other); + return $txtComp->getMatchRatio(new TextOnlyComparator($this)); + } + + public function expandWhiteSpace() { + $shift = 0; + $spaceAdded = false; + + $nbOriginalChildren = $this->getNbChildren(); + for ($i = 0; $i < $nbOriginalChildren; ++$i) { + $child = $this->children[$i + $shift]; + + if ($child instanceof TagNode) { + if (!$child->isPre()) { + $child->expandWhiteSpace(); + } + } + if (!$spaceAdded && $child->whiteBefore) { + $ws = new WhiteSpaceNode(null, ' ', $child->getLeftMostChild()); + $ws->setParent($this); + $this->addChildAbsolute($ws,$i + ($shift++)); + } + if ($child->whiteAfter) { + $ws = new WhiteSpaceNode(null, ' ', $child->getRightMostChild()); + $ws->setParent($this); + $this->addChildAbsolute($ws,$i + 1 + ($shift++)); + $spaceAdded = true; + } else { + $spaceAdded = false; + } + + } + } + + public function getLeftMostChild() { + if (empty($this->children)) { + return $this; + } + return $this->children[0]->getLeftMostChild(); + } + + public function getRightMostChild() { + if (empty($this->children)) { + return $this; + } + return $this->children[$this->getNbChildren() - 1]->getRightMostChild(); + } + + public function isPre() { + return 0 == strcasecmp($this->qName,'pre'); + } + + public static function toDiffLine(TagNode $node) { + return $node->openingTag; + } +} + +/** + * Represents a piece of text in the HTML file. + */ +class TextNode extends Node { + + public $text; + + public $modification; + + function __construct($parent, $text) { + parent::__construct($parent); + $this->modification = new Modification(Modification::NONE); + $this->text = $text; + } + + public function copyTree() { + $clone = clone $this; + $clone->setParent(null); + return $clone; + } + + public function getLeftMostChild() { + return $this; + } + + public function getRightMostChild() { + return $this; + } + + public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) { + if ($this->modification->type == Modification::REMOVED + && $this->modification->id == $id){ + $somethingDeleted = true; + $allDeleted = true; + return array($this); + } + return array(); + } + + public function isSameText($other) { + if (is_null($other) || ! $other instanceof TextNode) { + return false; + } + return str_replace('\n', ' ',$this->text) === str_replace('\n', ' ',$other->text); + } + + public static function toDiffLine(TextNode $node) { + return str_replace('\n', ' ',$node->text); + } +} + +class WhiteSpaceNode extends TextNode { + + function __construct($parent, $s, Node $like = null) { + parent::__construct($parent, $s); + if(!is_null($like) && $like instanceof TextNode) { + $newModification = clone $like->modification; + $newModification->firstOfID = false; + $this->modification = $newModification; + } + } +} + +/** + * Represents the root of a HTML document. + */ +class BodyNode extends TagNode { + + function __construct() { + parent::__construct(null, 'body', array()); + } + + public function copyTree() { + $newThis = new BodyNode(); + foreach ($this->children as &$child) { + $newChild = $child->copyTree(); + $newChild->setParent($newThis); + $newThis->children[] = $newChild; + } + return $newThis; + } + + public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) { + $nodes = array(); + foreach ($this->children as &$child) { + $childrenChildren = $child->getMinimalDeletedSet($id, + $allDeleted, $somethingDeleted); + $nodes = array_merge($nodes, $childrenChildren); + } + return $nodes; + } + +} + +/** + * Represents an image in HTML. Even though images do not contain any text they + * are independent visible objects on the page. They are logically a TextNode. + */ +class ImageNode extends TextNode { + + public $attributes; + + function __construct(TagNode $parent, /*array*/ $attrs) { + if(!array_key_exists('src', $attrs)) { + HTMLDiffer::diffDebug( "Image without a source\n" ); + parent::__construct($parent, ''); + }else{ + parent::__construct($parent, '' . strtolower($attrs['src']) . ''); + } + $this->attributes = $attrs; + } + + public function isSameText($other) { + if (is_null($other) || ! $other instanceof ImageNode) { + return false; + } + return $this->text === $other->text; + } + +} + +/** + * No-op node + */ +class DummyNode extends Node { + + function __construct() { + // no op + } + +} -- 2.20.1